import numpy
import pandas
from matplotlib import pyplot
data=pandas.read_table('./secom.data',sep=' ',encoding='utf-8',header=None,na_values='NaN')
shape=data.shape
for i in range(shape[1]):
    data.iloc[:,i].fillna(value=data.iloc[:,i].mean(),inplace=True)
data=data.values
#去除平均值
mean=numpy.mean(data,axis=0,dtype=numpy.float64)
data=numpy.subtract(data,mean)
#计算协方差矩阵
cov=numpy.cov(data,rowvar=False,dtype=numpy.float64)
value,vector=numpy.linalg.eig(cov)
#进行排序
index=numpy.argsort(value)[::-1]
value=value[index]
vector=vector[:,index]
#计算数据在新坐标系的坐标
new_data=data@vector
percent=numpy.cumsum(value)/numpy.sum(value)
pyplot.plot(list(range(1,21)),percent[0:20])
pyplot.show()